PAC - Global YouTube Statistics 2023
GRUPO 3:
- Elvis Dany ValentÃn Victorino
- Jonathan Jerry Castillo RodrÃguez
- Juan Carlos Guerra Sandoval
- Raúl Raico Gallardo
Carga del Dataset
df<-read.csv('data/Global YouTube Statistics.csv', encoding ='UTF-8', sep = ',')
Exploración del DataFrame
Variables
names(df)
## [1] "rank"
## [2] "Youtuber"
## [3] "subscribers"
## [4] "video.views"
## [5] "category"
## [6] "Title"
## [7] "uploads"
## [8] "Country"
## [9] "Abbreviation"
## [10] "channel_type"
## [11] "video_views_rank"
## [12] "country_rank"
## [13] "channel_type_rank"
## [14] "video_views_for_the_last_30_days"
## [15] "lowest_monthly_earnings"
## [16] "highest_monthly_earnings"
## [17] "lowest_yearly_earnings"
## [18] "highest_yearly_earnings"
## [19] "subscribers_for_last_30_days"
## [20] "created_year"
## [21] "created_month"
## [22] "created_date"
## [23] "Gross.tertiary.education.enrollment...."
## [24] "Population"
## [25] "Unemployment.rate"
## [26] "Urban_population"
## [27] "Latitude"
## [28] "Longitude"
Contenido superior de DataFrame
head(df)
## rank Youtuber subscribers video.views category
## 1 1 T-Series 245000000 228000000000 Music
## 2 2 YouTube Movies 170000000 0 Film & Animation
## 3 3 MrBeast 166000000 28368841870 Entertainment
## 4 4 Cocomelon - Nursery Rhymes 162000000 164000000000 Education
## 5 5 SET India 159000000 148000000000 Shows
## 6 6 Music 119000000 0 nan
## Title uploads Country Abbreviation channel_type
## 1 T-Series 20082 India IN Music
## 2 youtubemovies 1 United States US Games
## 3 MrBeast 741 United States US Entertainment
## 4 Cocomelon - Nursery Rhymes 966 United States US Education
## 5 SET India 116536 India IN Entertainment
## 6 Music 0 nan nan Music
## video_views_rank country_rank channel_type_rank
## 1 1 1 1
## 2 4055159 7670 7423
## 3 48 1 1
## 4 2 2 1
## 5 3 2 2
## 6 4057944 NaN NaN
## video_views_for_the_last_30_days lowest_monthly_earnings
## 1 2.258e+09 564600
## 2 1.200e+01 0
## 3 1.348e+09 337000
## 4 1.975e+09 493800
## 5 1.824e+09 455900
## 6 NaN 0
## highest_monthly_earnings lowest_yearly_earnings highest_yearly_earnings
## 1 9.0e+06 6.8e+06 1.084e+08
## 2 5.0e-02 4.0e-02 5.800e-01
## 3 5.4e+06 4.0e+06 6.470e+07
## 4 7.9e+06 5.9e+06 9.480e+07
## 5 7.3e+06 5.5e+06 8.750e+07
## 6 0.0e+00 0.0e+00 0.000e+00
## subscribers_for_last_30_days created_year created_month created_date
## 1 2e+06 2006 Mar 13
## 2 NaN 2006 Mar 5
## 3 8e+06 2012 Feb 20
## 4 1e+06 2006 Sep 1
## 5 1e+06 2006 Sep 20
## 6 NaN 2013 Sep 24
## Gross.tertiary.education.enrollment.... Population Unemployment.rate
## 1 28.1 1366417754 5.36
## 2 88.2 328239523 14.70
## 3 88.2 328239523 14.70
## 4 88.2 328239523 14.70
## 5 28.1 1366417754 5.36
## 6 NaN NaN NaN
## Urban_population Latitude Longitude
## 1 471031528 20.59368 78.96288
## 2 270663028 37.09024 -95.71289
## 3 270663028 37.09024 -95.71289
## 4 270663028 37.09024 -95.71289
## 5 471031528 20.59368 78.96288
## 6 NaN NaN NaN
Filas y columnas del DataFrame
dim(df)
## [1] 995 28
Resúmen EstadÃstico
summary(df)
## rank Youtuber subscribers video.views
## Min. : 1.0 Length:995 Min. : 12300000 Min. :0.000e+00
## 1st Qu.:249.5 Class :character 1st Qu.: 14500000 1st Qu.:4.288e+09
## Median :498.0 Mode :character Median : 17700000 Median :7.761e+09
## Mean :498.0 Mean : 22982412 Mean :1.104e+10
## 3rd Qu.:746.5 3rd Qu.: 24600000 3rd Qu.:1.355e+10
## Max. :995.0 Max. :245000000 Max. :2.280e+11
##
## category Title uploads Country
## Length:995 Length:995 Min. : 0.0 Length:995
## Class :character Class :character 1st Qu.: 194.5 Class :character
## Mode :character Mode :character Median : 729.0 Mode :character
## Mean : 9187.1
## 3rd Qu.: 2667.5
## Max. :301308.0
##
## Abbreviation channel_type video_views_rank country_rank
## Length:995 Length:995 Min. : 1 Min. : 1.0
## Class :character Class :character 1st Qu.: 323 1st Qu.: 11.0
## Mode :character Mode :character Median : 916 Median : 51.0
## Mean : 554249 Mean : 386.1
## 3rd Qu.: 3584 3rd Qu.: 123.0
## Max. :4057944 Max. :7741.0
## NA's :1 NA's :116
## channel_type_rank video_views_for_the_last_30_days lowest_monthly_earnings
## Min. : 1.0 Min. :1.000e+00 Min. : 0
## 1st Qu.: 27.0 1st Qu.:2.014e+07 1st Qu.: 2700
## Median : 65.5 Median :6.408e+07 Median : 13300
## Mean : 745.7 Mean :1.756e+08 Mean : 36886
## 3rd Qu.: 139.8 3rd Qu.:1.688e+08 3rd Qu.: 37900
## Max. :7741.0 Max. :6.589e+09 Max. :850900
## NA's :33 NA's :56
## highest_monthly_earnings lowest_yearly_earnings highest_yearly_earnings
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 43500 1st Qu.: 32650 1st Qu.: 521750
## Median : 212700 Median : 159500 Median : 2600000
## Mean : 589808 Mean : 442257 Mean : 7081814
## 3rd Qu.: 606800 3rd Qu.: 455100 3rd Qu.: 7300000
## Max. :13600000 Max. :10200000 Max. :163400000
##
## subscribers_for_last_30_days created_year created_month created_date
## Min. : 1 Min. :1970 Length:995 Min. : 1.00
## 1st Qu.: 100000 1st Qu.:2009 Class :character 1st Qu.: 8.00
## Median : 200000 Median :2013 Mode :character Median :16.00
## Mean : 349079 Mean :2013 Mean :15.75
## 3rd Qu.: 400000 3rd Qu.:2016 3rd Qu.:23.00
## Max. :8000000 Max. :2022 Max. :31.00
## NA's :337 NA's :5 NA's :5
## Gross.tertiary.education.enrollment.... Population Unemployment.rate
## Min. : 7.60 Min. :2.025e+05 Min. : 0.750
## 1st Qu.: 36.30 1st Qu.:8.336e+07 1st Qu.: 5.270
## Median : 68.00 Median :3.282e+08 Median : 9.365
## Mean : 63.63 Mean :4.304e+08 Mean : 9.279
## 3rd Qu.: 88.20 3rd Qu.:3.282e+08 3rd Qu.:14.700
## Max. :113.10 Max. :1.398e+09 Max. :14.720
## NA's :123 NA's :123 NA's :123
## Urban_population Latitude Longitude
## Min. : 35588 Min. :-38.42 Min. :-172.10
## 1st Qu.: 55908316 1st Qu.: 20.59 1st Qu.: -95.71
## Median :270663028 Median : 37.09 Median : -51.93
## Mean :224214982 Mean : 26.63 Mean : -14.13
## 3rd Qu.:270663028 3rd Qu.: 37.09 3rd Qu.: 78.96
## Max. :842933962 Max. : 61.92 Max. : 138.25
## NA's :123 NA's :123 NA's :123
Estructura del DatFrame
str(df)
## 'data.frame': 995 obs. of 28 variables:
## $ rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Youtuber : chr "T-Series" "YouTube Movies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ subscribers : int 245000000 170000000 166000000 162000000 159000000 119000000 112000000 111000000 106000000 98900000 ...
## $ video.views : num 2.28e+11 0.00 2.84e+10 1.64e+11 1.48e+11 ...
## $ category : chr "Music" "Film & Animation" "Entertainment" "Education" ...
## $ Title : chr "T-Series" "youtubemovies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ uploads : int 20082 1 741 966 116536 0 1111 4716 493 574 ...
## $ Country : chr "India" "United States" "United States" "United States" ...
## $ Abbreviation : chr "IN" "US" "US" "US" ...
## $ channel_type : chr "Music" "Games" "Entertainment" "Education" ...
## $ video_views_rank : int 1 4055159 48 2 3 4057944 5 44 630 8 ...
## $ country_rank : num 1 7670 1 2 2 NaN 3 1 5 5 ...
## $ channel_type_rank : num 1 7423 1 1 2 ...
## $ video_views_for_the_last_30_days : num 2.26e+09 1.20e+01 1.35e+09 1.98e+09 1.82e+09 ...
## $ lowest_monthly_earnings : num 564600 0 337000 493800 455900 ...
## $ highest_monthly_earnings : num 9.0e+06 5.0e-02 5.4e+06 7.9e+06 7.3e+06 ...
## $ lowest_yearly_earnings : num 6.8e+06 4.0e-02 4.0e+06 5.9e+06 5.5e+06 ...
## $ highest_yearly_earnings : num 1.08e+08 5.80e-01 6.47e+07 9.48e+07 8.75e+07 ...
## $ subscribers_for_last_30_days : num 2e+06 NaN 8e+06 1e+06 1e+06 NaN NaN NaN 1e+05 6e+05 ...
## $ created_year : num 2006 2006 2012 2006 2006 ...
## $ created_month : chr "Mar" "Mar" "Feb" "Sep" ...
## $ created_date : num 13 5 20 1 20 24 12 29 14 23 ...
## $ Gross.tertiary.education.enrollment....: num 28.1 88.2 88.2 88.2 28.1 NaN 88.2 63.2 81.9 88.2 ...
## $ Population : num 1.37e+09 3.28e+08 3.28e+08 3.28e+08 1.37e+09 ...
## $ Unemployment.rate : num 5.36 14.7 14.7 14.7 5.36 NaN 14.7 2.29 4.59 14.7 ...
## $ Urban_population : num 4.71e+08 2.71e+08 2.71e+08 2.71e+08 4.71e+08 ...
## $ Latitude : num 20.6 37.1 37.1 37.1 20.6 ...
## $ Longitude : num 79 -95.7 -95.7 -95.7 79 ...
Valores no nulos por columna
non_null_count <- sapply(df, function(x) sum(!is.na(x)))
non_null_count
## rank Youtuber
## 995 995
## subscribers video.views
## 995 995
## category Title
## 995 995
## uploads Country
## 995 995
## Abbreviation channel_type
## 995 995
## video_views_rank country_rank
## 994 879
## channel_type_rank video_views_for_the_last_30_days
## 962 939
## lowest_monthly_earnings highest_monthly_earnings
## 995 995
## lowest_yearly_earnings highest_yearly_earnings
## 995 995
## subscribers_for_last_30_days created_year
## 658 990
## created_month created_date
## 995 990
## Gross.tertiary.education.enrollment.... Population
## 872 872
## Unemployment.rate Urban_population
## 872 872
## Latitude Longitude
## 872 872
Matriz de valores faltantes (nulos)
library(naniar)
naniar::vis_miss(df)

Nuevo DataFrame: df2 (sin variables con valores nulos o vacÃos)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df2 <- df %>%
select_if(~all(!is.na(.)) & all(. != ""))
Matriz de valores faltantes de df2
naniar::vis_miss(df2)

str(df2)
## 'data.frame': 995 obs. of 15 variables:
## $ rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Youtuber : chr "T-Series" "YouTube Movies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ subscribers : int 245000000 170000000 166000000 162000000 159000000 119000000 112000000 111000000 106000000 98900000 ...
## $ video.views : num 2.28e+11 0.00 2.84e+10 1.64e+11 1.48e+11 ...
## $ category : chr "Music" "Film & Animation" "Entertainment" "Education" ...
## $ Title : chr "T-Series" "youtubemovies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ uploads : int 20082 1 741 966 116536 0 1111 4716 493 574 ...
## $ Country : chr "India" "United States" "United States" "United States" ...
## $ Abbreviation : chr "IN" "US" "US" "US" ...
## $ channel_type : chr "Music" "Games" "Entertainment" "Education" ...
## $ lowest_monthly_earnings : num 564600 0 337000 493800 455900 ...
## $ highest_monthly_earnings: num 9.0e+06 5.0e-02 5.4e+06 7.9e+06 7.3e+06 ...
## $ lowest_yearly_earnings : num 6.8e+06 4.0e-02 4.0e+06 5.9e+06 5.5e+06 ...
## $ highest_yearly_earnings : num 1.08e+08 5.80e-01 6.47e+07 9.48e+07 8.75e+07 ...
## $ created_month : chr "Mar" "Mar" "Feb" "Sep" ...
Nuevo DataFrame: df3 (con promedio de ganancias anuales)
#Se crea df3
df3 <- df2
#Se agrega la columna promedio de ganancias anuales
df3$promedio_yearly_earnings <- rowMeans(df3[c("highest_yearly_earnings", "lowest_yearly_earnings")], na.rm = TRUE)
Estructura de df3
str(df3)
## 'data.frame': 995 obs. of 16 variables:
## $ rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Youtuber : chr "T-Series" "YouTube Movies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ subscribers : int 245000000 170000000 166000000 162000000 159000000 119000000 112000000 111000000 106000000 98900000 ...
## $ video.views : num 2.28e+11 0.00 2.84e+10 1.64e+11 1.48e+11 ...
## $ category : chr "Music" "Film & Animation" "Entertainment" "Education" ...
## $ Title : chr "T-Series" "youtubemovies" "MrBeast" "Cocomelon - Nursery Rhymes" ...
## $ uploads : int 20082 1 741 966 116536 0 1111 4716 493 574 ...
## $ Country : chr "India" "United States" "United States" "United States" ...
## $ Abbreviation : chr "IN" "US" "US" "US" ...
## $ channel_type : chr "Music" "Games" "Entertainment" "Education" ...
## $ lowest_monthly_earnings : num 564600 0 337000 493800 455900 ...
## $ highest_monthly_earnings: num 9.0e+06 5.0e-02 5.4e+06 7.9e+06 7.3e+06 ...
## $ lowest_yearly_earnings : num 6.8e+06 4.0e-02 4.0e+06 5.9e+06 5.5e+06 ...
## $ highest_yearly_earnings : num 1.08e+08 5.80e-01 6.47e+07 9.48e+07 8.75e+07 ...
## $ created_month : chr "Mar" "Mar" "Feb" "Sep" ...
## $ promedio_yearly_earnings: num 5.76e+07 3.10e-01 3.44e+07 5.04e+07 4.65e+07 ...
Gráfico de Tendencias (Dispersión)
Gráfico de Dispersión de Promedio de ganacias anuales por números de
vistas
# Carga la librerÃa
library(ggplot2)
plot1 <- ggplot(df3, aes(x = video.views, y = promedio_yearly_earnings)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
ggtitle("Tendencia entre \n Vistas de Videos y \n Ganancias Anuales") +
theme_minimal()
plot2 <- ggplot(df3, aes(x = uploads, y = promedio_yearly_earnings)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
ggtitle("Tendencia entre \n Cargas y \n Ganancias Anuales") +
theme_minimal()
# Crea un gráfico de dispersión con lÃnea de regresión para 'subscribers' vs 'average_yearly_earnings'
plot3 <- ggplot(df3, aes(x = subscribers, y = promedio_yearly_earnings)) +
geom_point() +
geom_smooth(method = "lm", col = "red") +
ggtitle("Tendencia entre \n Suscriptores y \n Ganancias Anuales") +
theme_minimal()
# Muestra los gráficos en una fila
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(plot1, plot2, plot3, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Mapa de Calor
# Carga las librerÃas
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
# Supongamos que df3 es tu conjunto de datos
# Puedes ajustar el código según la estructura exacta de tu conjunto de datos
# Selecciona las columnas relevantes
df3_corr <- df3[, c('promedio_yearly_earnings', 'video.views', 'uploads', 'subscribers')]
# Calcula la matriz de correlación
corr_matrix <- cor(df3_corr)
# Crea un mapa de calor de la matriz de correlación con tonalidades de rojo
corrplot(corr_matrix, method = "color", type = "upper", col = colorRampPalette(c("white", "darkred"))(50), addCoef.col = "white", tl.cex = 0.7)

Promedio de ganancias anuales por CategorÃas
# install.packages("plotly")
# install.packages("dplyr")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
# Filtra las filas donde la categorÃa no es "nan"
df3_filtered <- df3 %>%
filter(!is.na(category) & category != "nan")
# Calcula el promedio de las ganancias anuales por categorÃa
categories <- df3_filtered %>%
group_by(category) %>%
summarise(promedio_yearly_earnings = mean(promedio_yearly_earnings)) %>%
arrange(desc(promedio_yearly_earnings))
# Crea el gráfico de barras con degradado de colores
fig <- plot_ly(
data = categories,
x = ~reorder(category, promedio_yearly_earnings),
y = ~promedio_yearly_earnings,
type = "bar",
marker = list(color = ~promedio_yearly_earnings, colorscale = "Reds"),
text = ~paste(round(promedio_yearly_earnings / 1000000, 2), "M"),
hoverinfo = "text",
height = 500
)
# Personaliza el diseño del gráfico
fig <- fig %>% layout(
title = "Promedio de ganancia anual por CategorÃa",
xaxis = list(title = "CategorÃa", titlefont_size = 16, categoryorder = "total descending"),
yaxis = list(title = "Promedio de ganancia anual", titlefont_size = 16),
showlegend = FALSE
)
# Muestra el gráfico
fig
Nube de Palabras de los Youtubers con más Suscriptores
# Carga las librerÃas
library(wordcloud2)
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
# Puedes ajustar el código según la estructura exacta de tu conjunto de datos
df$Youtuber <- iconv(df$Youtuber, from = "UTF-8", to = "UTF-8", sub = "")
# Crea un objeto Corpus para procesar el texto
corpus <- Corpus(VectorSource(df$Youtuber))
# Limpia el texto de manera más exhaustiva
corpus <- tm_map(corpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
corpus <- tm_map(corpus, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("english")):
## transformation drops documents
corpus <- tm_map(corpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, stripWhitespace): transformation drops
## documents
corpus <- tm_map(corpus, content_transformer(function(x) iconv(x, to = "UTF-8", sub =
" ")))
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(function(x) iconv(x,
## : transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(function(x) iconv(x,
## : transformation drops documents
# Crea un dataframe con el nombre del Youtuber y sus suscriptores
youtubers_data <- data.frame(Youtuber = sapply(corpus, function(x) as.character(x)),
Subscribers = df$subscribers)
library(colorRamps)
# Definir una paleta de colores de degradado rojo
red_palette <- colorRampPalette(c("White", "red"))
# Especifica lana fuente Arial
font <- "Arial Narrow "
# Crea el gráfico de nube de palabras con wordcloud2 y la fuente especificada
wordcloud2(data = youtubers_data, color = red_palette(100), backgroundColor = "black",
fontFamily = font)
Los 10 Youtubers con más Suscriptores (Millones)
library(ggplot2)
top10 <- head(df3, 10)
# Ajusta el ancho completo del gráfico
ggplot(top10, aes(x = reorder(Youtuber, -subscribers), y = subscribers, fill = subscribers)) +
geom_bar(stat = "identity", width = 1.0) + # Ajusta el ancho de las barras aquÃ
geom_text(aes(label = subscribers/1000000), vjust = -0.5, size = 2.4) + # Agrega etiquetas
labs(title = "Los 10 Youtubers con más Suscriptores (Millones)", x = "Youtuber", y = "suscripciones") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1), # Ajusta el ángulo de las etiquetas en el eje x
plot.title = element_text(size = 10), # Ajusta el tamaño del tÃtulo
plot.margin = margin(l = 1, r = 1, unit = "pt") # Ajusta los márgenes del gráfico
) +
scale_fill_gradient(low = "pink", high = "darkred") # Define el degradado de colores

Porcentaje de Canales por CategorÃa
df3_category <- df3[, "category", drop = FALSE]
df3_category_sin_nan <- df3_category[df3_category$category != "nan", , drop = FALSE]
str(df3_category_sin_nan)
## 'data.frame': 949 obs. of 1 variable:
## $ category: chr "Music" "Film & Animation" "Entertainment" "Education" ...
# Calcula la frecuencia de cada valor en 'category'
category_counts <- table(df3_category_sin_nan$category)
# Calcula los porcentajes
category_percentages <- prop.table(category_counts) * 100
# Crea un dataframe con la información
category_summary <- data.frame(
category = names(category_counts),
count = as.numeric(category_counts),
percentage = as.numeric(category_percentages)
)
# Muestra el resultado
category_summary
## category count percentage
## 1 Autos & Vehicles 2 0.2107482
## 2 Comedy 69 7.2708114
## 3 Education 45 4.7418335
## 4 Entertainment 241 25.3951528
## 5 Film & Animation 46 4.8472076
## 6 Gaming 94 9.9051633
## 7 Howto & Style 40 4.2149631
## 8 Movies 2 0.2107482
## 9 Music 202 21.2855638
## 10 News & Politics 26 2.7397260
## 11 Nonprofits & Activism 2 0.2107482
## 12 People & Blogs 132 13.9093783
## 13 Pets & Animals 4 0.4214963
## 14 Science & Technology 17 1.7913593
## 15 Shows 13 1.3698630
## 16 Sports 11 1.1591149
## 17 Trailers 2 0.2107482
## 18 Travel & Events 1 0.1053741
library(plotly)
library(dplyr)
# Calcula la frecuencia de cada categorÃa
category <- table(df3_category_sin_nan$category)
# Ordena las categorÃas por frecuencia ascendente
category <- sort(category)
# Crea el gráfico de pastel con plot_ly
pie_chart <- plot_ly(
labels = names(category),
values = category,
type = "pie",
marker = list(colors = colorRampPalette(c("green", "red"))(length(category)))
)
# Personaliza el diseño del gráfico
pie_chart <- pie_chart %>% layout(
title = "Canales por CategorÃas",
uniformtext = list(minsize = 10, mode = 'hide'),
template = 'plotly_white'
)
# Muestra el gráfico
pie_chart
Los 10 PaÃses con mayor cantidad de Youtubers
df3_Country <- df3[, "Country", drop = FALSE]
df3_Country_sin_nan <- df3_Country[df3_Country$Country != "nan", , drop = FALSE]
str(df3_Country_sin_nan)
## 'data.frame': 873 obs. of 1 variable:
## $ Country: chr "India" "United States" "United States" "United States" ...
library(dplyr)
country_counts <- table(df3_Country_sin_nan$Country)
print(country_counts)
##
## Afghanistan Andorra Argentina
## 1 1 13
## Australia Bangladesh Barbados
## 9 1 1
## Brazil Canada Chile
## 62 15 3
## China Colombia Cuba
## 1 11 1
## Ecuador Egypt El Salvador
## 2 2 1
## Finland France Germany
## 1 5 6
## India Indonesia Iraq
## 168 28 2
## Italy Japan Jordan
## 2 5 3
## Kuwait Latvia Malaysia
## 1 1 1
## Mexico Morocco Netherlands
## 33 1 3
## Pakistan Peru Philippines
## 6 1 12
## Russia Samoa Saudi Arabia
## 16 1 9
## Singapore South Korea Spain
## 3 17 22
## Sweden Switzerland Thailand
## 4 1 18
## Turkey Ukraine United Arab Emirates
## 4 8 7
## United Kingdom United States Venezuela
## 43 313 1
## Vietnam
## 3
top_10_countries <- head(sort(country_counts, decreasing = TRUE), 10)
# Muestra los 10 valores más altos
print(top_10_countries)
##
## United States India Brazil United Kingdom Mexico
## 313 168 62 43 33
## Indonesia Spain Thailand South Korea Russia
## 28 22 18 17 16
library(plotly)
# Crea el gráfico de embudo
fig <- plot_ly(
type = "funnel",
x = top_10_countries,
textinfo = "value+text",
text = names(top_10_countries),
marker = list(
color = colorRampPalette(c("red", "pink"))(length(top_10_countries))
)
)
# Personaliza el diseño del gráfico
fig <- fig %>% layout(
title = "Top 10 de PaÃses con más de Youtubers",
template = 'plotly_white',
funnelmode = "stack", # Puedes ajustar según tus preferencias
funnelgap = 0.2, # Puedes ajustar según tus preferencias
textfont = list(size = 12) # Ajusta este valor según sea necesario
)
# Muestra el gráfico
fig
## Warning: 'layout' objects don't have these attributes: 'funnelmode', 'funnelgap', 'textfont'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
Youtuber en el Mundo
library(dplyr)
library(plotly)
# Calcula la frecuencia de cada paÃs
conteos_por_pais <- df3_Country_sin_nan %>%
count(Country, name = "Contar_por_pais")
# Combina el resultado con el dataframe original
df3_Country_sin_nan_2 <- left_join(df3_Country_sin_nan, conteos_por_pais, by = "Country")
# Elimina duplicados en la variable Country
df3_Country_sin_nan_2 <- df3_Country_sin_nan_2 %>%
distinct(Country, .keep_all = TRUE)
# Define la escala de colores personalizada
custom_colorscale <- c(
c(0, "#115f9a"),
c(0.05, "#1984c5"),
c(0.1, "#22a7f0"),
c(0.2, "#48b5c4"),
c(0.4, "#76c68f"),
c(0.6, "#a6d75b"),
c(0.7, "#c9e52f"),
c(0.8, "#d0ee11"),
c(1, "#f4f100")
)
# Crea el gráfico de coropletas
fig <- plot_ly(
data = df3_Country_sin_nan_2,
type = "choropleth",
locations = ~Country,
locationmode = "country names",
z = ~Contar_por_pais,
colorscale = custom_colorscale,
hoverinfo = "text",
text = ~paste("Nº Canales: ", Contar_por_pais),
title = "Distribución de Canales en el Mundo"
)
# Personaliza el diseño del gráfico
fig <- fig %>% layout(
title = list(text = "Distribución de Canales en el Mundo", x = 0.3),
title_font = list(size = 18),
height = 630,
width = 1100,
geo = list(
showcoastlines = TRUE,
coastlinecolor = "Black",
showland = TRUE,
landcolor = "LightGray",
showocean = TRUE,
oceancolor = "LightBlue"
)
)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
# Muestra el gráfico
fig
## Warning: 'layout' objects don't have these attributes: 'title_font'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'
## Warning: 'choropleth' objects don't have these attributes: 'title'
## Valid attributes include:
## 'autocolorscale', 'coloraxis', 'colorbar', 'colorscale', 'customdata', 'customdatasrc', 'featureidkey', 'geo', 'geojson', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'legendgroup', 'legendgrouptitle', 'legendrank', 'locationmode', 'locations', 'locationssrc', 'marker', 'meta', 'metasrc', 'name', 'reversescale', 'selected', 'selectedpoints', 'showlegend', 'showscale', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'z', 'zauto', 'zmax', 'zmid', 'zmin', 'zsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Promedio de Ganancias anuales por PaÃs
library(plotly)
library(dplyr)
# Filtra las filas donde la categorÃa no es "nan"
df3_filtered <- df3 %>%
filter(!is.na(Country) & Country != "nan")
# Calcula el promedio de las ganancias anuales por paÃs
Country <- df3_filtered %>%
group_by(Country) %>%
summarise(promedio_yearly_earnings = mean(promedio_yearly_earnings)) %>%
arrange(desc(promedio_yearly_earnings))
# Crea el gráfico de barras con degradado de colores
fig <- plot_ly(
data = Country,
x = ~reorder(Country, promedio_yearly_earnings),
y = ~promedio_yearly_earnings,
type = "bar",
marker = list(color = ~promedio_yearly_earnings, colorscale = "Reds"),
text = ~paste(Country, round(promedio_yearly_earnings / 1000000, 2), "M"),
hoverinfo = "text",
height = 500
)
# Personaliza el diseño del gráfico
fig <- fig %>% layout(
title = "Ganancia anual promedio por paÃs",
xaxis = list(title = "PaÃs", titlefont_size = 16, categoryorder = "total descending"),
yaxis = list(title = "Ganancia anual promedio", titlefont_size = 16),
showlegend = FALSE
)
# Muestra el gráfico
fig
Ganancias Anuales Promedio de Youtubers por Suscriptores
library(dplyr)
df3_youtuber_ganancia <- df3
df3_top_10 <- df3_youtuber_ganancia %>%
top_n(10, promedio_yearly_earnings)
# Muestra el nuevo conjunto de datos con las 10 cifras más altas
df3_top_10
## rank Youtuber subscribers video.views
## 1 1 T-Series 245000000 228000000000
## 2 4 Cocomelon - Nursery Rhymes 162000000 164000000000
## 3 5 SET India 159000000 148000000000
## 4 16 Sony SAB 83000000 101000000000
## 5 22 Zee TV 70500000 73139054467
## 6 84 GR6 EXPLODE 38900000 25154232306
## 7 140 StarPlus 32000000 26800674545
## 8 303 KL BRO Biju Rithvik 22800000 17988347989
## 9 418 DaFuq!?Boom! 19600000 7906181776
## 10 496 \xfd\xfd\xfd\xfd\xfd\xfd\xfd\xfd\xfd KIMPRO 17700000 19206701832
## category Title uploads
## 1 Music T-Series 20082
## 2 Education Cocomelon - Nursery Rhymes 966
## 3 Shows SET India 116536
## 4 Shows Sony SAB 71270
## 5 Entertainment Zee TV 129204
## 6 Music GR6 EXPLODE 3043
## 7 Entertainment StarPlus 44892
## 8 Entertainment KL BRO Biju Rithvik 1841
## 9 Film & Animation DaFuq!?Boom! 214
## 10 nan \xfd\xfd\xfd\xfd\xfd\xfd\xfd\xfd\xfd KIMPRO 1646
## Country Abbreviation channel_type lowest_monthly_earnings
## 1 India IN Music 564600
## 2 United States US Education 493800
## 3 India IN Entertainment 455900
## 4 India IN Entertainment 414300
## 5 India IN Entertainment 426800
## 6 Brazil BR Music 408700
## 7 India IN Entertainment 416800
## 8 nan nan Entertainment 508100
## 9 United States US Entertainment 576000
## 10 South Korea KR People 850900
## highest_monthly_earnings lowest_yearly_earnings highest_yearly_earnings
## 1 9000000 6800000 108400000
## 2 7900000 5900000 94800000
## 3 7300000 5500000 87500000
## 4 6600000 5000000 79600000
## 5 6800000 5100000 81900000
## 6 6500000 4900000 78500000
## 7 6700000 5000000 80000000
## 8 8100000 6100000 97600000
## 9 9200000 6900000 110600000
## 10 13600000 10200000 163400000
## created_month promedio_yearly_earnings
## 1 Mar 57600000
## 2 Sep 50350000
## 3 Sep 46500000
## 4 Aug 42300000
## 5 Dec 43500000
## 6 Aug 41700000
## 7 May 42500000
## 8 Jul 51850000
## 9 Jun 58750000
## 10 Nov 86800000
df3_top_10$Youtuber <- iconv(df3_top_10$Youtuber, from = "UTF-8", to = "UTF-8", sub = "")
library(ggplot2)
# Ajusta el tamaño del texto
ggplot(df3_top_10, aes(x = Youtuber, y = subscribers, fill = promedio_yearly_earnings)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0("$", promedio_yearly_earnings/1000000,"M")), vjust = -0.5, size = 2.4) + # Ajusta el tamaño del texto
scale_fill_gradient(low = "lightcoral", high = "darkred") +
theme_minimal() +
labs(
title = "Ganancias Anuales Promedio de Youtubers por Suscriptores",
x = "Youtuber",
y = "Subscriptores",
#caption = "Ganancia anual promedio"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
